# -------------------------------------------------------------------
# Purpose: Creates Table B48
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataconfanalysis, "patentLevel19001940.RData"))
d <- patentLevel19001940[no_of_inventors > 1]

## Winsorize and normalize ------------------------------------------------
cols <- str_subset(names(d), "_ws$")
d <- d[, (cols) := NULL]

cols <- str_subset(names(d), "_s$")
d <- d[, (cols) := NULL]

vars <- str_subset(names(d), "entropy|evenness|relgendist")
d[, paste0(vars, "_w") := lapply(.SD, function(x) winsorize(x, probs = c(.01, .99))), .SDcols = vars]

vars <- str_subset(names(d), "entropy|evenness|relgendist")
vars <- str_subset(vars, "_w$")
d[, paste0(vars, "s") := lapply(.SD, scale), .SDcols = vars]

vars <- str_subset(names(d), "log_no_of_patent_namelast_mp_adjp")
d[, paste0(vars, "_s") := lapply(.SD, scale), .SDcols = vars]



# Regressions
o <- list()
o <- append(o, list(feols(entropy_patent_namelast_mp_adjp_ws ~ entropy_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(entropy_patent_namelast_mp_adjp_ws ~ entropy_namelast_mp_adjp_ws | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(log_no_of_patent_namelast_mp_adjp_s ~ entropy_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(log_no_of_patent_namelast_mp_adjp_s ~ entropy_namelast_mp_adjp_ws | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(evenness_patent_namelast_mp_adjp_ws ~ entropy_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(evenness_patent_namelast_mp_adjp_ws ~ entropy_namelast_mp_adjp_ws | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(entropy_patent_namelast_mp_relgendist_max_adjp_ws ~ entropy_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
o <- append(o, list(feols(entropy_patent_namelast_mp_relgendist_max_adjp_ws ~ entropy_namelast_mp_adjp_ws | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))

r <- list()
r <- append(r, list(feols(entropy_patent_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(entropy_patent_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(log_no_of_patent_namelast_mp_adjp_s ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(log_no_of_patent_namelast_mp_adjp_s ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(evenness_patent_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(evenness_patent_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(entropy_patent_namelast_mp_relgendist_max_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))
r <- append(r, list(feols(entropy_patent_namelast_mp_relgendist_max_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year, d, weight = d$wgt)))

i <- list()
i <- append(i, list(feols(entropy_patent_namelast_mp_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(entropy_patent_namelast_mp_adjp_ws ~ 1 | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(log_no_of_patent_namelast_mp_adjp_s ~ 1 | gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(log_no_of_patent_namelast_mp_adjp_s ~ 1 | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(evenness_patent_namelast_mp_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(evenness_patent_namelast_mp_adjp_ws ~ 1 | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(entropy_patent_namelast_mp_relgendist_max_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))
i <- append(i, list(feols(entropy_patent_namelast_mp_relgendist_max_adjp_ws ~ 1 | no_of_inventors + gisjoin_1900 + statefip^year + tech_category^year | entropy_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_w, d, weight = d$wgt)))


# Create table
y1 <- paste0("\\makecell{Surname diversity \\\\ of patent}")
y2 <- paste0("\\makecell{Log distinct \\\\ surnames of patent}")
y3 <- paste0("\\makecell{Surname dispersion \\\\ of patent}")
y4 <- paste0("\\makecell{Genetic distance \\\\ weighted surname \\\\ diversity of patent}")

setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_w = "Predicted surname diversity",
    entropy_patent_namelast_mp_adjp_ws = y1, log_no_of_patent_namelast_mp_adjp_s = y2,
    evenness_patent_namelast_mp_adjp_ws = y3, entropy_patent_namelast_mp_relgendist_max_adjp_ws = y4,
    year = "Period", statefip = "State", namelast_mp = "Surname", gisjoin_1900 = "County", main_patentclass = "Patent technology class", tech_category = "Patent technology field", no_of_inventors = "Team size"
  )
)


tablename <- file.path(poutputappendix, "tableB48.tex")
etable(o,
  cluster = ~gisjoin_1900,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
add_table_row(tablename, "\\midrule", "\\multicolumn{2}{l}{\\textit{Panel A: Least-squares estimates}} &  \\multicolumn{7}{c}{}\\\\ \\cmidrule(lr){1-9}")
add_table_row(tablename, "patent", "\\cmidrule(lr){2-3} \\cmidrule(lr){4-5}  \\cmidrule(lr){6-7} \\cmidrule(lr){8-9}")
move_table_row(tablename, "Observations", "bottomrule")
add_table_row(tablename, "    \\\\", c("\\multicolumn{2}{l}{\\textit{Panel B: Reduced-form estimates}} &  \\multicolumn{7}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel C: Instrumental-variable estimates}} &  \\multicolumn{7}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel D: First-stage estimates}} &  \\multicolumn{7}{c}{}\\\\"))

temptable <- file.path(poutputappendix, "temp.tex")
etable(r,
  cluster = ~gisjoin_1900,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel B", c("\\cmidrule(lr){1-9}", estimates_rows))

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  cluster = ~gisjoin_1900,
  fitstat = ~ivwald,
  digits = "r3", digits.stats = "r0",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
fstat_row <- get_table_row(temptable, "Wald")
add_table_row(tablename, "Panel C", c("\\cmidrule(lr){1-9}", estimates_rows, "\\\\", fstat_row))
edit_table_content_fixed(tablename, "Wald (1st stage), Surname diversity", "Sanderson-Windmeijer \\textit{F}-stat")

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  stage = 1,
  cluster = ~gisjoin_1900,
  fitstat = ~n, digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
estimates_rows <- collapse_stage1(estimates_rows, c(2, 3, NA, NA, NA, NA, NA, NA))
add_table_row(tablename, "Panel D", estimates_rows)
add_table_row(tablename, "Panel D", c("\\cmidrule(lr){1-9}", "& \\multicolumn{2}{c}{\\makecell{Surname diversity \\\\ of patent}} &  &  &  &  &  & \\\\", "\\cmidrule(lr){2-3}"))
file.remove(temptable)

cat("Table B48 saved to:", tablename, "\n")